import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import pickle

atp_tennis = pd.read_csv(os.path.join(sys.argv[1], 'atp_tennis.csv'))

atp_tennis['Date'] = pd.to_datetime(atp_tennis['Date'])

print(atp_tennis['Date'])
# pickle.dump(atp_tennis['Date'],open("./ref_result/atp_tennis_Date.pkl","wb"))

import pandas as pd
import pickle


last_date = atp_tennis['Date'].max()

print(last_date)
# pickle.dump(last_date,open("./ref_result/last_date.pkl","wb"))

import pandas as pd
import pickle


five_years_ago = last_date - pd.DateOffset(years=5)
recent_matches = atp_tennis[atp_tennis['Date'] >= five_years_ago]

print(recent_matches)
# pickle.dump(recent_matches,open("./ref_result/recent_matches.pkl","wb"))

import pandas as pd
import pickle


top_10_ranked_players = pd.concat([recent_matches[['Player_1', 'Rank_1']], recent_matches[['Player_2', 'Rank_2']].rename(columns={'Player_2': 'Player_1', 'Rank_2': 'Rank_1'})])
top_10_ranked_players = top_10_ranked_players.groupby('Player_1').min().sort_values('Rank_1').head(10)

print(top_10_ranked_players)
# pickle.dump(top_10_ranked_players,open("./ref_result/top_10_ranked_players.pkl","wb"))

import pandas as pd
import pickle


# Convert the top 10 ranked players and their rankings to a Python list
top_10_ranked_players_list = top_10_ranked_players.reset_index().values.tolist()

print(top_10_ranked_players_list)
# pickle.dump(top_10_ranked_players_list,open("./ref_result/top_10_ranked_players_list.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


top_10_player_names = [player[0].strip() for player in top_10_ranked_players_list]

print(top_10_player_names)
# pickle.dump(top_10_player_names,open("./ref_result/top_10_player_names.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


grass_matches = recent_matches[(recent_matches['Surface'] == 'Grass') & (recent_matches['Player_1'].isin(top_10_player_names) | recent_matches['Player_2'].isin(top_10_player_names))]

print(grass_matches)
# pickle.dump(grass_matches,open("./ref_result/grass_matches.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


matches_played = grass_matches['Player_1'].value_counts() + grass_matches['Player_2'].value_counts()
matches_played = matches_played[top_10_player_names].fillna(0)

print(matches_played)
# pickle.dump(matches_played,open("./ref_result/matches_played.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


matches_won = grass_matches['Winner'].value_counts()
matches_won = matches_won[top_10_player_names].fillna(0)

print(matches_won)
# pickle.dump(matches_won,open("./ref_result/matches_won.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


win_rate_percentage = (matches_won / matches_played) * 100

print(win_rate_percentage)
# pickle.dump(win_rate_percentage,open("./ref_result/win_rate_percentage.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


head_to_head_matches = grass_matches[(grass_matches['Player_1'].isin(top_10_player_names)) & (grass_matches['Player_2'].isin(top_10_player_names))]

print(head_to_head_matches)
# pickle.dump(head_to_head_matches,open("./ref_result/head_to_head_matches.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


head_to_head_matrix = pd.DataFrame(np.zeros((10, 10)), columns=top_10_player_names, index=top_10_player_names) 

print(head_to_head_matrix)
# pickle.dump(head_to_head_matrix,open("./ref_result/head_to_head_matrix.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


for _, match in head_to_head_matches.iterrows():
    winner = match['Winner']
    loser = match['Player_1'] if match['Player_1'] != winner else match['Player_2']
    head_to_head_matrix.loc[winner, loser] += 1

print(head_to_head_matrix)
# pickle.dump(head_to_head_matrix,open("./ref_result/head_to_head_matrix.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


rounds = ['Early Round', 'Quarterfinals', 'Semifinals', 'Final']

print(rounds)
# pickle.dump(rounds,open("./ref_result/rounds.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


performance_data_list = []

print(performance_data_list)
# pickle.dump(performance_data_list,open("./ref_result/performance_data_list.pkl","wb"))

import pandas as pd
import numpy as np
import pickle


for player in top_10_player_names:
    for round_name in rounds:
        if round_name == 'Early Round':
            # Filter the dataset to only include matches played by the player in the early rounds
            player_round_matches = grass_matches[((grass_matches['Round'] == '1st Round') | (grass_matches['Round'] == '2nd Round') | (grass_matches['Round'] == '3rd Round') | (grass_matches['Round'] == '4th Round')) & ((grass_matches['Player_1'] == player) | (grass_matches['Player_2'] == player))]
        else:
            # Filter the dataset to only include matches played by the player in the specific round
            player_round_matches = grass_matches[(grass_matches['Round'] == round_name) & ((grass_matches['Player_1'] == player) | (grass_matches['Player_2'] == player))]
        
        # Calculate the number of matches played
        matches_played = len(player_round_matches)
        
        # Calculate the number of matches won
        matches_won = len(player_round_matches[player_round_matches['Winner'] == player])
        
        # Calculate the win rate
        win_rate = (matches_won / matches_played) * 100 if matches_played > 0 else 0

print(win_rate)
# pickle.dump(win_rate,open("./ref_result/win_rate.pkl","wb"))

